import logging

import re
import json
import tqdm
import scrapy
from scrapy import signals
from scrapy.http import JsonRequest, Request
from scrapy.spidermiddlewares.httperror import HttpError
from twisted.internet.error import TimeoutError, TCPTimedOutError

from ..items import AminerItem, IndicesItem, ProfileItem
from ..items import TagItem, ExpertItem

from ..utils.query_mgr import QueryManager


class IclrSpider(scrapy.Spider):
    name = 'iclr'
    allowed_domains = ['www.aminer.cn']
    # start_urls = ['https://www.aminer.cn/search/person']
    # https://www.aminer.cn/search/person?t=b&q=集成电路
    start_urls = ['https://apiv2.aminer.cn/n?a=__searchapi.SearchPerson___']

    headers = {
        "Accept": "application/json",
        # "Authorization": JWT token, get from login
        "Content-Type": "application/json",
    }

    req_payload = [
        {
            "action": "searchapi.SearchPerson",
            "parameters": {
                "offset": 0,
                "size": 20,
                "query": "集成电路",
                "include": ["agg", "intelli", "topics"],
                "aggregation": ["gender", "h_index", "nation", "lang"]},
            "schema": {
                "person": ["id", "name", "name_zh", "avatar", "tags", "is_follow", "num_view", "num_follow",
                           "is_upvoted", "num_upvoted", "is_downvoted", "bind",
                           {"profile": ["position", "position_zh", "affiliation", "affiliation_zh", "org"]},
                           {"indices": ["hindex", "gindex", "pubs", "citations", "newStar", "risingStar", "activity",
                                        "diversity", "sociability"]}, "tags_translated_zh"]
            }
        }
    ]

    tot_pages = -1
    offset = 0
    size = 20
    progress = None  # tqdm

    def __init__(self, name=None, queries="queries.txt", query_done="query_done.txt", **kwargs):
        super(IclrSpider, self).__init__(name, **kwargs)
        self.logger.setLevel(logging.INFO)
        # use '-a' option to add parameter
        self.queries_file = queries
        self.query_done_file = query_done

    @classmethod
    def from_crawler(cls, crawler, *args, **kwargs):
        spider = super(IclrSpider, cls).from_crawler(crawler, *args, **kwargs)
        crawler.signals.connect(spider.spider_opened, signals.spider_opened)
        crawler.signals.connect(spider.spider_closed, signals.spider_closed)
        return spider

    query_mgr = None

    def spider_opened(self):
        self.logger.info("spider %s opened.", self.name)

        self.query_mgr = QueryManager(self.queries_file, self.query_done_file)
        pass

    def spider_closed(self, reason):
        pass

    def build_request(self):
        query = self.query_mgr.next_query()
        if query is not None:
            self.req_payload[0]["parameters"]["query"] = query
            self.req_payload[0]["parameters"]["offset"] = self.offset
            return JsonRequest(self.start_urls[0],
                               data=self.req_payload,
                               callback=self.parse,
                               errback=self.error_callback,
                               headers=self.headers,
                               dont_filter=True)
        else:
            self.logger.info("All queries have done.")
            self.crawler.engine.close_spider(self, "All queries have done.")

    def start_requests(self):
        yield self.build_request()

    def error_callback(self, failure):
        self.logger.error("error: %s", repr(failure))

        if failure.check(HttpError):
            # these exceptions come from HttpError spider middleware
            # you can get the non-200 response
            response = failure.value.response
            self.logger.error('HttpError on %s', response.url)

        elif failure.check(TimeoutError, TCPTimedOutError):
            request = failure.request
            self.logger.error('TimeoutError on %s', request.url)

    def parse(self, response):
        self.logger.debug(repr(response))
        """
        {"data":[{"aggregation":[{"item":[{"count":51,"label":"\u003e=60","value":"\u003e=60"},{"count":36,"label":"50-59","value":"50-59"},{"count":73,"label":"40-49","value":"40-49"},{"count":109,"label":"30-39","value":"30-39"},{"count":170,"label":"20-29","value":"20-29"},{"count":312,"label":"10-19","value":"10-19"},{"count":249,"label":"\u003c10","value":"\u003c10"}],"label":"H-Index","type":"h_index"},{"item":[{"count":440,"label":"male","value":"male"},{"count":34,"label":"female","value":"female"}],"label":"Gender","type":"gender"},{"item":[{"count":229,"label":"English","value":"English"},{"count":70,"label":"Chinese","value":"Chinese"},{"count":69,"label":"Japanese","value":"Japanese"},{"count":22,"label":"Greek","value":"Greek"},{"count":18,"label":"French","value":"French"},{"count":15,"label":"Korean","value":"Korean"},{"count":12,"label":"German","value":"German"},{"count":6,"label":"Indian","value":"Indian"}],"label":"Language","type":"language"},{"item":[{"count":235,"label":"USA","value":"USA"},{"count":75,"label":"Japan","value":"Japan"},{"count":17,"label":"China","value":"China"},{"count":14,"label":"Canada","value":"Canada"},{"count":13,"label":"Germany","value":"Germany"},{"count":13,"label":"France","value":"France"},{"count":11,"label":"Taiwan","value":"Taiwan"},{"count":9,"label":"Belgium","value":"Belgium"},{"count":7,"label":"United Kingdom","value":"United Kingdom"},{"count":6,"label":"South Korea","value":"South Korea"},{"count":6,"label":"Netherlands","value":"Netherlands"},{"count":5,"label":"Italy","value":"Italy"},{"count":4,"label":"Singapore","value":"Singapore"},{"count":4,"label":"Israel","value":"Israel"},{"count":4,"label":"India","value":"India"},{"count":4,"label":"Finland","value":"Finland"},{"count":3,"label":"Australia","value":"Australia"},{"count":2,"label":"Switzerland","value":"Switzerland"},{"count":2,"label":"Poland","value":"Poland"},{"count":2,"label":"Mexico","value":"Mexico"},{"count":2,"label":"Austria","value":"Austria"},{"count":1,"label":"Sweden","value":"Sweden"},{"count":1,"label":"Spain","value":"Spain"},{"count":1,"label":"Serbia","value":"Serbia"},{"count":1,"label":"Russia","value":"Russia"},{"count":1,"label":"Hong Kong","value":"Hong Kong"},{"count":1,"label":"Bangladesh","value":"Bangladesh"},{"count":1,"label":"Argentina","value":"Argentina"}],"label":"Location","type":"nationality"}],"intellResults":{"kgHypernym":[{"word":"Computer Science","word_zh":"计算机科学"},{"word":"Semiconductor Device","word_zh":"半导体器件"},{"is_random":true,"word":"Electronic Circuit","word_zh":"电子电路"}],"kgHyponym":[{"word":"Logic Circuit","word_zh":"逻辑电路"},{"word":"Semiconductor Memory","word_zh":"半导体存储器"},{"word":"System On A Chip","word_zh":"片上系统"},{"word":"Chipsets"},{"word":"Interconnect","word_zh":"互联"},{"word":"Fpga Device","word_zh":"可重构逻辑和fpga"},{"word":"Logic Family","word_zh":"逻辑系列"},{"word":"Graphic Chip"},{"is_random":true,"word":"Mo Integrated Circuit"}],"level":4,"transLang":2,"transText":"Integrated circuit"},"meta":{"context":"A1F6DF88","time":"1.529096112s"},"pager":{"total":1000},"persons":[{"bind":false,"ctags":[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19],"id":"53f434fcdabfaee4dc776670","indices":{"activity":0,"citations":775,"diversity":0,"gindex":25,"hindex":12,"newStar":0,"pubs":25,"risingStar":0,"sociability":4.7706},"name":"Alan C. Nilsson","name_zh":"","num_followed":0,"num_upvoted":0,"num_viewed":8,"profile":{"affiliation":"Edward L. Ginzton Laboratory|Stanford University","org":"Edward L. Ginzton Laboratory|Stanford University"}},{"aff":{},"avatar":"https://static.aminer.org/upload/avatar/267/1622/937/53f324cadabfae9a8446ac07.jpeg","bind":false,"ctags":[20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39],"id":"53f324cadabfae9a8446ac07","indices":{"activity":0,"citations":3443,"diversity":0,"gindex":57,"hindex":33,"newStar":0,"pubs":163,"risingStar":0,"sociability":4.1743},"name":"Stephen M. Trimberger","num_followed":0,"num_upvoted":0,"num_viewed":51,"profile":{"affiliation":"Xilinx","position":"Fellow"}},{"aff":{},"avatar":"https://static.aminer.org/upload/avatar/855/170/1726/53f56719dabfae63d4f8045b.png","bind":false,"ctags":[40,23,41,42,25,21,43,44,45,46,47,48,49,50,51,52,53,54,55,56],"id":"53f56719dabfae63d4f8045b","indices":{"activity":0.5314,"citations":8720,"diversity":0.0192,"gindex":89,"hindex":48,"newStar":18.433,"pubs":309,"risingStar":18.433,"sociability":5.8579},"name":"Rob A. Rutenbar","name_zh":"","num_followed":0,"num_upvoted":0,"num_viewed":180,"profile":{"affiliation":"Department of Computer Science,University of Illinois at Urbana-Champaign/University of Pittsburgh","position":"Adjunct Professor"}},{"bind":false,"ctags":[20,57,21,42,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73],"id":"53f445e2dabfaee4dc7cf0fd","indices":{"activity":0.0175,"citations":736,"diversity":0.0074,"gindex":21,"hindex":11,"newStar":0,"pubs":133,"risingStar":0,"sociability":5.0875},"name":"Sergey Shumarayev","name_zh":"","num_followed":0,"num_upvoted":0,"num_viewed":3,"profile":{"affiliation":"Altera Corporation, San Jose, CA, USA","org":"Altera Corporation, San Jose, CA, USA"}},{"bind":false,"ctags":[74,5,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,91,92],"id":"53f438f5dabfaec09f195cc0","indices":{"activity":3.2496,"citations":6188,"diversity":0.0408,"gindex":75,"hindex":37,"newStar":0.0156,"pubs":359,"risingStar":0.0156,"sociability":6.9216},"name":"Dries Van Thourhout","name_zh":"","num_followed":0,"num_upvoted":0,"num_viewed":62,"profile":{"affiliation":"Ghent University|Department of Information Technology","org":"Ghent University|Department of Information Technology"}},{"aff":{},"avatar":"https://static.aminer.org/upload/avatar/1162/1241/549/542e04cfdabfae4b91c3c7fc.jpeg","bind":false,"ctags":[93,94,47,95,96,97,23,98,99,100,101,102,103,104,105,106,21,107,108,109],"id":"542e04cfdabfae4b91c3c7fc","indices":{"activity":10.5518,"citations":2710,"diversity":0,"gindex":33,"hindex":19,"newStar":90.1174,"pubs":1046,"risingStar":90.1174,"sociability":7.3498},"name":"Yintang Yang","name_zh":"杨银堂","num_followed":0,"num_upvoted":0,"num_viewed":233,"profile":{"affiliation_zh":"西安电子科技大学","position":"Professor","position_zh":"教授"}},{"bind":false,"ctags":[0,2,21,110,111,112,1,113,114,115,116,117,3,118,119,12,4,120,121,122],"id":"53f44bdfdabfaee02ad2f2e5","indices":{"activity":0,"citations":723,"diversity":0.3245,"gindex":26,"hindex":13,"newStar":0,"pubs":26,"risingStar":0,"sociability":3.6888},"name":"Vincent G. Dominic","name_zh":"","num_followed":0,"num_upvoted":0,"num_viewed":4,"profile":{"org":""}},{"bind":false,"id":"562f290c45cedb3399564104","indices":{"activity":0.0142,"citations":520,"diversity":1.4166,"gindex":22,"hindex":10,"newStar":0,"pubs":49,"risingStar":0,"sociability":5.6903},"name":"Vincent G Dominic","num_followed":0,"num_upvoted":0,"num_viewed":3},{"bind":false,"ctags":[2,0,4,1,6,3,10,123,14,124,125,9,23,5,13,126,17,127,79,11],"id":"53f43dd4dabfaeb1a7c165f7","indices":{"activity":0,"citations":436,"diversity":1.847,"gindex":20,"hindex":9,"newStar":0,"pubs":33,"risingStar":0,"sociability":5.4889},"name":"Randal A. Salvatore","name_zh":"","num_followed":0,"num_upvoted":0,"num_viewed":4,"profile":{"affiliation":"California Institute of Technology","org":"California Institute of Technology"}},{"avatar":"https://static.aminer.org/upload/avatar/1161/1017/1383/53f661a5dabfae706e2b560d.jpeg","bind":false,"ctags":[128,80,129,130,21,131,132,133,134,135,136,137,138,139,140,141,142,143,144,145],"id":"53f661a5dabfae706e2b560d","indices":{"activity":0,"citations":7166,"diversity":0.0051,"gindex":80,"hindex":45,"newStar":0,"pubs":193,"risingStar":0,"sociability":5.6094},"name":"Wojciech P. Maly","name_zh":"","num_followed":0,"num_upvoted":0,"num_viewed":50,"profile":{"affiliation":"Department of  Electrical \u0026 Computer Engineering, Carnegie Mellon University","position":"Professor","position_zh":"教授"}},{"aff":{},"avatar":"https://static.aminer.org/upload/avatar/1848/1984/1729/53f639b8dabfae8531c71628.jpeg","bind":false,"ctags":[95,93,146,23,102,97,147,57,148,149,150,105,151,152,153,154,155,156,157,8],"id":"53f639b8dabfae8531c71628","indices":{"activity":11.4641,"citations":7212,"diversity":0.0039,"gindex":55,"hindex":37,"newStar":39.5675,"pubs":1521,"risingStar":39.5675,"sociability":7.5267},"name":"Zhihua Wang","name_zh":"王志华","num_followed":0,"num_upvoted":0,"num_viewed":1124,"profile":{"affiliation":"Department of Microelectronics and Nanoelectronics, Tsinghua University","affiliation_zh":"清华大学信息技术学院微电子与纳电子学系微电子学研究所","position":"教授","position_zh":"教授"}},{"bind":false,"ctags":[158,159,160,21,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175,176],"id":"53f458dcdabfaeb22f50a92f","indices":{"activity":0.3665,"citations":782,"diversity":1.9584,"gindex":26,"hindex":15,"newStar":0.0525,"pubs":95,"risingStar":0.0525,"sociability":5.3659},"name":"Sergey K. Tolpygo","name_zh":"","num_followed":0,"num_upvoted":0,"num_viewed":12,"profile":{"affiliation":"HYPRES, Inc., Elmsford, NY Stony Brook University, Stony Brook, NY","org":"HYPRES, Inc., Elmsford, NY Stony Brook University, Stony Brook, NY"}},{"bind":false,"ctags":[77,23,177,109,178,179,180,181,7,182,183,184,185,186,187,188,189,190,191,192],"id":"53f42ce3dabfaee2a1c746ef","indices":{"activity":0,"citations":672,"diversity":0,"gindex":24,"hindex":15,"newStar":0,"pubs":75,"risingStar":0,"sociability":5.2522},"name":"Shoji Shukuri","name_zh":"","num_followed":0,"num_upvoted":0,"num_viewed":3,"profile":{"affiliation":"Central Research Laboratory, Hitachi Ltd., Kokubunji, Tokyo 185, Japan","org":"Central Research Laboratory, Hitachi Ltd., Kokubunji, Tokyo 185, Japan"}},{"aff":{},"avatar":"https://static.aminer.org/upload/avatar/532/533/294/53f43828dabfaedd74db0dc0.jpeg","bind":false,"ctags":[193,194,195,196,197,198,21,199,200,201,202,203,204,205,206,207,208,209,210,211],"id":"53f43828dabfaedd74db0dc0","indices":{"activity":0.3354,"citations":8830,"diversity":0.0326,"gindex":90,"hindex":49,"newStar":0,"pubs":271,"risingStar":0,"sociability":6.5072},"name":"Gary S. Sayler","name_zh":"","num_followed":2,"num_upvoted":0,"num_viewed":56,"profile":{"affiliation":"The University of Tennessee ,The Center for Environmental Biotechnology","position":"Professor"}},{"aff":{},"avatar":"https://static.aminer.org/upload/avatar/811/1803/1919/5434bc45dabfaebba585dbc0.jpeg","bind":false,"ctags":[47,101,95,212,82,102,213,109,214,215,216,217,218,219,220,221,222,223,224,225],"id":"5434bc45dabfaebba585dbc0","indices":{"activity":2.4169,"citations":19035,"diversity":0.0041,"gindex":132,"hindex":52,"newStar":22.1966,"pubs":713,"risingStar":22.1966,"sociability":9.1563},"name":"Mansun Chan","name_zh":"陳文新","num_followed":0,"num_upvoted":0,"num_viewed":188,"profile":{"affiliation":"Department of Electrical and Electronic Engineering, Hong Kong University of Science and Technology","position":"Professor"}},{"avatar":"https://static.aminer.org/upload/avatar/682/636/1325/54085bb3dabfae450f410bf5.jpeg","bind":false,"ctags":[226,227,228,87,229,230,231,232,233,234,235,164,236,40,237,238,239,240,241,242],"id":"54085bb3dabfae450f410bf5","indices":{"activity":0.0219,"citations":7133,"diversity":0.0013,"gindex":54,"hindex":36,"newStar":0.3102,"pubs":1500,"risingStar":0.3102,"sociability":7.4821},"name":"Tadahiro Ohmi","name_zh":"","num_followed":0,"num_upvoted":0,"num_viewed":62,"profile":{"affiliation":"Tohoku University","org":"Tohoku University","position":"professor"}},{"bind":false,"id":"53f46590dabfaeb22f539cf3","indices":{"activity":0.0263,"citations":121,"diversity":0.0299,"gindex":11,"hindex":5,"newStar":0,"pubs":212,"risingStar":0,"sociability":5.1817},"name":"Minh Van Ngo","name_zh":"","num_followed":0,"num_upvoted":0,"num_viewed":2,"profile":{"org":""}},{"avatar":"https://static.aminer.cn/upload/avatar/2034/303/451/53f43775dabfaedf435892e0.png","bind":false,"ctags":[40,43,80,243,244,245,246,247,248,128,249,250,251,252,253,254,255,256,257,258],"id":"53f43775dabfaedf435892e0","indices":{"activity":0.0658,"citations":4176,"diversity":2.147,"gindex":59,"hindex":34,"newStar":0.0781,"pubs":315,"risingStar":0.0781,"sociability":6.0426},"name":"Willy Sansen","name_zh":"","num_followed":0,"num_upvoted":0,"num_viewed":72,"profile":{"affiliation":"Catholic University of Leuven","org":"KULeuven Leuven Belgium","position":"Professor"}},{"bind":false,"ctags":[259,28,260,261,262,263,169,264,265,21,266,267,268,269,270,271,272,273,274,275],"id":"53f475c0dabfaee02add18c0","indices":{"activity":0,"citations":569,"diversity":0,"gindex":22,"hindex":15,"newStar":1.2091,"pubs":42,"risingStar":1.2091,"sociability":4.0253},"name":"James P. Baukus","name_zh":"","num_followed":0,"num_upvoted":0,"profile":{"affiliation":"Hughes Research Laboratories (RL-62), 3011 Malibu Canyon Road, Malibu, California 90265, USA","org":"Hughes Research Laboratories (RL-62), 3011 Malibu Canyon Road, Malibu, California 90265, USA"}},{"bind":false,"ctags":[2,0,1,6,4,14,10,3,23,9,120,276,5,13,16,277,278,279,280,281],"id":"53f46ca3dabfaedf4365e15c","indices":{"activity":0,"citations":330,"diversity":0.8387,"gindex":18,"hindex":6,"newStar":0,"pubs":22,"risingStar":0,"sociability":4.852},"name":"Sheila Hurtt","name_zh":"","num_followed":0,"num_upvoted":0,"num_viewed":4,"profile":{"org":""}}],"search_explain":"Guess the search purpose is term according to the translate. Guess the search purpose is term finally. Search from old way. ","succeed":true,"tags":{"tags":["Photonic Integrated Circuit","Wavelength Division Multiplexing","Photonic Integrated Circuits","Optical Receivers","Optical Transmitters","Optical Fibers","Integrated Optics","Reliability","Power Consumption","IIi-v Semiconductors","INP","Optical Waveguides","Stimulated Emission","Optoelectronics","Indium Compounds","Fiber Optics","Large-scale Photonic Integrated Circuits","Monolithic Integration","Spectral Efficiency","Fiber Coupling","Programmable Logic Device","Integrated Circuit","Field Programmable Gate Array","CHIP","Field Programmable Gate Arrays","Routing","Very Large Scale Integration","Packaging","Reverse Engineering","Fpga Architecture","Fpga Routing","Sensitive Data","Fpga Bitstreams","Field-programmable Gate Array Technology","Guest Editor","Authenticated Encryption","Field-programmable Gate Arrays","Simple Graphical Chip Assembly","Custom Chip","Simple Interactive Graphical Tool","Analog Circuits","Design Automation","Integrated Circuit Design","Analog Circuit","Integrated Circuit Layout","Algorithm Design And Analysis","Process Variation","Silicon","Speech Recognition","Boolean Satisfiability","Computational Modeling","Spatial Variation","Statistical Analysis","Monte Carlo Methods","Hardware","Computer Aided Design","Panel Session","Transceivers","Programmable Logic","Bit Error Rate","Programmable Logic Devices","Programmable Logic Device Architecture","Pld Architecture","On-die Instrumentation","Data Rate","Phase-tunable Quadrature-generation Method","Timing Variability","Duty Cycle","High-speed Pld Architecture","Effective Mean","Higher Bandwidth","Digital Scaling","Analog Design","Latest Submicron Node","Optical Waveguide","Laser Diodes","Silicon Photonic","Focused Ion Beam","Optical Fiber","Optical Filters","Integrated Circuits","Photonic Crystals","Silicon On Insulator","Polycrystalline Silicon","Quantum Dot","Optical Interconnect","Photonic Crystal","Silicon Wafer","Silicon Chip","Optical Lithography","Silicon Photonics","Liquid Crystal","Ring Resonator","CMOS","Breakdown Voltage","Cmos Integrated Circuits","Network-on-chip","Low Power","Silicon Carbide","Network On Chip","Substrates","Mosfet","Cmos Technology","Through-silicon Vias","Switches","Capacitors","Analytical Model","Simulation Result","Analog-to-digital Converter","Logic Gates","Liquid Crystals","Beam Steering","Fiber Optic","Optical Arrays","Spatial Light Modulators","Costs","Sonet","Beam Propagation Method","Optical Fiber Networks","Optical Fiber Communication","Large Scale Integration","Optical Transmission Network","Transmitter Photonic Integrated Circuit","Semiconductor Lasers","Indium Phosphide","Mode Locking","Power Spectrum","Receivers","Ic Design","Physical Design","Design For Testability","Logic Synthesis","Standard Cell","Design For Manufacturing","System Integration","Design For Manufacturability","Failure Analysis","Test Strategy","Placement And Routing","Test Set","Tradeoff Analysis","Field Effect Transistor","Manufacturing Process","Sensitivity Analysis","Performance Study","Pattern Matching","Phase Locked Loops","Phase Noise","Low Power Electronics","Cmos Process","Wireless Communication","Calibration","Radio Frequency","Size 65 Nm","Size 0.18 Mum","Noise","Endoscopes","Bandwidth","Josephson Junctions","Superconductors","Josephson Junction","Fabrication","Niobium","Superconducting Integrated Circuits","Thin Film","Junctions","Electron Irradiation","Critical Temperature","Magnetic Field","Room Temperature","Materials Science","Resistors","Electric Conductivity","Physical Properties","Crystal Structure","Critical Current Density","Hall Effect","Current Density","Semiconductor Integrated Circuit Device","Ion Beam","Nonvolatile Memory","Nor Circuits","Flash Memories","Tunnelling","Programming","Logic Gate","Hot Carriers","Power Dissipation","Scalability","Voltage 1.8 V","ALU","0.5 Micron","Buffer Storage","Bioluminescence","Genetic Engineering","Chemical Reaction","Bioreporter","Biodegradation","Microorganisms","Genetics","Biosensors","LUX","Bacteria","Microbial Community","Microbiology","Genes","Bioremediation","Environmental Science","Enzyme","Water Pollution","Degradation","Kinetics","Threshold Voltage","SI","Thin Film Transistors","Physics","Semiconductor Device Models","Circuit Simulation","Numerical Simulation","Capacitance","Compact Model","Device Physics","Silicon-on-insulator","Semiconductor Device Modeling","Compact Modeling","Mathematical Model","Ion Implantation","Atomic Force Microscopy","Thin Films","Field Effect Transistors","Plasma Processing","Epitaxial Growth","Magnetron Sputtering","Plasma Etching","Gate Dielectric","Electron Mobility","High Precision","Chemical Vapor Deposition","Gate Oxide","Metal Oxide Semiconductor","Power Density","Surface Roughness","Silicon Oxide","Circuit Design","Operational Amplifiers","Operational Amplifier","Electrical Impedance","Bipolar Transistor","Variable Capacitor","Crystal Oscillators","Analog Signal Processing","Frequency Compensation","Voltage Controlled Oscillator","Fault Detection","Capacitive Sensor","Input And Output","Transfer Function","Class Ab","Power Integrity","Superlattices","Infrared","Cadmium Telluride","Infrared Spectra","Band Gap","Molecular Beam Epitaxy","Infrared Radiation","Transmission Electron Microscopy","Gds Level","Hardware Ip","Valence Band","Functional Circuitry","Logic Function","Netlist Extraction","Silicon Imaging Analysis","Trojan Circuit Insertion","Crystal Growth","Cost Effectiveness","Integration Complexity","Manufacturing Industries","Digital Signal Processing","Dwdm Pic","Disruption Tolerant Networking"],"trans":["光子集成回路","波分复用","光子集成","光接收组件","光发射机","光纤","集成光学","可靠性","电能消耗","iii-v族半导体","磷化铟","光波导","受激发射","光电子学","铟化合物","纤维光学","","单片集成","频谱效率","光纤耦合","可编程逻辑器件","集成电路","现场可编程门阵列","集成电路","现场可编程门阵列","布线","超大规模集成电路","封装","逆向工程","fpga结构","fpga布线问题","敏感数据","","","客座编辑","认证加密","现场可编程门阵列","","","","模拟电路","设计自动化","集成电路设计","模拟电路","集成电路版图","算法设计与分析","工艺参数","硅","语音识别","布尔可满足性","计算模型","空间变化","统计分析","蒙特卡罗方法","硬件","计算机辅助设计","","光收发模块","可编程逻辑","误码率","可编程逻辑器件","","","","数据率","","","占空比","","","更高的带宽","","模拟前端","","光波导","激光器","硅光子","聚焦离子束","光纤","光纤滤波器","集成电路","光子晶体","绝缘层上硅","多晶硅","量子点","光互连","光子晶体","硅片","硅芯片","光学光刻","硅基光子学","液晶","环形腔","complementary metal–oxide–semiconductor (cmos)","击穿电压","cmos集成电路","片上网络","小功率","碳化硅","片上网络","衬底","mos场效应晶体管","cmos工艺","","开关","电容器","解析模型","仿真结果","a/d转换器","逻辑门","液晶","波控","光纤","光学阵列","空间光调制器","成本","","光束传播法","","光纤通信","大规模集成","","","半导体激光器","磷化铟","锁模","功率谱","接收机","集成电路设计","物理设计","可测性设计","逻辑综合","标准单元","面向制造的设计","系统集成","可制造性设计","失效分析","测试策略","布局布线","测试设备","权衡分析","场效应晶体管","制造工艺","灵敏度分析","性能研究","模式匹配","锁相环路","相位噪声","低功耗电子","cmos工艺","无线通信","刻度","射频","","","噪声","内窥镜","频带宽度","约瑟夫森结","超导体","josephson结","制备","铌","","薄膜","结","电子辐照","临界温度","磁场","室温","材料科学","电阻","电导率","物理性能","晶体结构","临界电流密度","霍尔效应","电流密度","","离子束流","非易失性存储器","","闪速存储器","隧道","程序设计","逻辑门","热载流子","功耗","可扩放性","","算术逻辑单元","微米","缓冲存储器","生物发光","基因工程","化学反应","生物报告体","生物降解","微生物","遗传学","生物传感器","光照度","细菌","微生物群落","微生物学","基因","生物修复","环境科学","酶","水污染","降解","动力学","阈值电压","硅","薄膜晶体管","物理","半导体器件模型","电路模拟","数值模拟","电容","集约模型","器件物理","绝缘体硅材料","半导体器件模型","集约模型","数学模型","离子注入","原子力显微镜","薄膜","场效应晶体管","等离子体加工","外延生长","磁控溅射","等离子体刻蚀","栅介质","电子迁移率","高精度","化学汽相沉积","栅氧化层","金属氧化物半导体","功率密度","表面粗糙度","氧化硅","电路设计","运算放大器","运算放大器","电阻抗","双极晶体管","可变电容","晶振","模拟信号处理","频率补偿","压控振荡器","故障检测","电容传感器","投入产出","传递函数","ab类","电源完整性","超晶格","红外","碲化镉","红外光谱","禁带宽度","分子束外延","红外辐射","透射电子显微术","","","价带","","逻辑函数","","","","晶体生长","成本——效果分析","","制造业","数字信号处理","","容中断网络"]},"topics":[{"i":"55830ccb8c93a0d7269b09f8","k":"Integrated circuit"}]}]}
        """
        obj = response.json()

        pager = obj['data'][0]['pager']['total']
        if self.tot_pages < 0:
            self.tot_pages = pager
            self.logger.info("total pager is %d" % (self.tot_pages // self.size))
            if self.progress is not None:
                self.progress.close()
            self.progress = tqdm.tqdm(total=self.tot_pages, desc="kwyword: %s" % self.query_mgr.next_query())

        for pobj in obj['data'][0]['persons']:
            p = AminerItem()  # one person
            p['id'] = pobj['id']
            p['name'] = pobj['name']
            p['name_zh'] = pobj.get("name_zh", "")
            p['avatar'] = pobj.get("avatar", "")
            p['num_followed'] = pobj.get("num_followed", 0)
            p['num_viewed'] = pobj.get("num_viewed", 0)
            p['num_upvoted'] = pobj.get("num_upvoted", 0)
            profile = ProfileItem()
            if 'profile' in pobj:
                for key in pobj['profile']:
                    if key in profile.fields.keys():
                        profile[key] = pobj['profile'][key]
            p['profile'] = [profile]

            indices = IndicesItem()
            if 'indices' in pobj:
                for key in pobj['indices']:
                    if key in indices.fields.keys():
                        indices[key] = pobj['indices'][key]
            p['indices'] = [indices]

            yield p

            # get person profile
            yield self.build_expert_request(p['name'], p['id'])

            self.progress.update(1)

        self.offset += self.size

        if self.offset >= self.tot_pages:
            self.tot_pages = -1
            self.offset = 0

            self.progress.close()
            self.progress = None

            self.query_mgr.query_done(self.query_mgr.next_query())

        yield self.build_request()

    def build_expert_request(self, name, id):
        name = name.lower().split(" ")
        name = "-".join(["".join([c for c in item if c.isalpha()]) for item in name])

        url = "https://www.aminer.cn/profile/{name}/{id}".format(name=name, id=id)
        # header 中含有 Authorization 部分，需要填写 JWT 格式的内容

        return Request(url, callback=self.parse_expert, errback=self.error_callback, dont_filter=True)

    def parse_expert(self, response):
        self.logger.debug("profile info: %s", repr(response.text))
        pattern = r"window\.g_initialProps[\s]*=[\s]*([\s\S]*?(</script>))"
        content = re.search(pattern, response.text).group(0)
        content = re.sub(r"window\.g_initialProps[\s]*=[\s]*", "", content)
        content = re.sub(r";[\s]*</script>", "", content)

        obj = json.loads(content)['profile']['profile']

        expert = ExpertItem()
        expert['id'] = obj['id']
        expert['url'] = response.url
        expert['name'] = obj['name']
        expert['name_zh'] = obj.get('name_zh', "")
        expert['nation'] = obj.get('nation', "")

        profile = ProfileItem()
        if 'profile' in obj:
            for key in obj['profile']:
                if key in profile.fields.keys():
                    profile[key] = obj['profile'][key]
        expert['profile'] = [profile]

        tags = []
        if "tags" in obj and "tags_score" in obj:
            for tag, score in zip(obj['tags'], obj['tags_score']):
                tag_item = TagItem()
                tag_item['tag'] = tag
                tag_item['score'] = score
                tags.append(tag_item)

        expert['tags'] = tags

        yield expert





